counts <- read.delim(
file = here("data/raw/GBM_raw_gene_counts.csv"),
sep = " ",
row.names = 1,
check.names = F,
)
head(counts)[c(1:15, ncol(counts)-5:ncol(counts))]
We loaded 3589 cells and 23368 genes.
metadata <- read.delim(
file = here("data/raw/GBM_metadata.csv"),
sep = " ",
row.names = 1,
stringsAsFactors = TRUE,
)
head(metadata)
metadata <- metadata |>
select(!c(Sample.type, housekeeping_cluster, ends_with("color"))) |>
rename(Patient = Sample.name, Paper_cluster = Cluster_2d) |>
unite("Batch", Patient, Location, remove = FALSE) |>
relocate(Batch, Patient, Location, Selection) |>
mutate(Batch = as_factor(Batch))
head(metadata, n = 3)
table(metadata[c("Patient", "Location")])
## Location
## Patient Distant Periphery Tumor
## BT_S1 0 56 433
## BT_S2 0 446 723
## BT_S4 0 562 980
## BT_S6 57 125 207
patient_data <- read_excel(
here("data/raw/patient_data.xlsx"),
range = "A2:V6",
.name_repair = "unique_quiet"
) |>
rename(Patient = 1) |>
rename_with(~ gsub(" ", "_", .x, fixed = TRUE)) |>
rename_with(~ paste("Subtype", .x, sep = "_"), Classical:Proneural) |>
rename_with(~ paste(.x, "expressing_cell_fraction", sep = "_"), CD274:B2M)
patient_data
Where WT = Wildtype, M = Methylated, NM = Non methylated, NT = Not tested.
options(Seurat.object.assay.calcn = TRUE)
obj <- CreateSeuratObject(
counts = counts,
meta.data = metadata,
min.cells = 3,
min.features = 100,
)
We’re left with 3584 cells and 20721 genes.
VlnPlot(
obj,
features = c("nFeature_RNA", "nCount_RNA", "ERCC_reads"),
layer = "counts",
) &
labs(x = NULL) &
scale_x_discrete(labels = NULL, breaks = NULL)
VlnPlot(
obj,
features = "ERCC_to_non_ERCC",
layer = "counts",
y.max = 3,
) +
labs(x = NULL) +
scale_x_discrete(labels = NULL, breaks = NULL) +
guides(fill="none")
n_feature_cutoff = 200
n_count_cutoff = 50000
FeatureScatter(
obj,
feature1 = "nCount_RNA",
feature2 = "nFeature_RNA",
cols = "black"
) +
NoLegend() +
geom_hline(
aes(yintercept = n_feature_cutoff),
color = "red", linetype = "dashed",
) +
geom_vline(
aes(xintercept = n_count_cutoff),
color = "red", linetype = "dashed",
)
obj <- subset(obj, subset = nFeature_RNA > n_feature_cutoff & nCount_RNA > n_count_cutoff)
104 cells filtered out during QC.
plot_cell_cls <- function(reduction) {
(DimPlot(
obj,
reduction = reduction,
group.by = "predicted.subclass",
cols = DiscretePalette(n = length(unique(obj$predicted.subclass)), palette = "polychrome"),
pt.size = 0.5,
) |
FeaturePlot(
obj,
reduction = reduction,
features = "predicted.subclass.score",
pt.size = 0.5,
)) /
(DimPlot(
obj,
reduction = reduction,
group.by = "Batch",
cols = DiscretePalette(n = nlevels(obj$Batch), palette = "polychrome"),
pt.size = 0.5,
) |
DimPlot(
obj,
reduction = reduction,
group.by = "Selection",
cols = DiscretePalette(n = nlevels(obj$Selection), palette = "polychrome"),
pt.size = 0.5,
))
}
plot_cell_cls("ref.umap")
obj <- SCTransform(object = obj, verbose = FALSE) |>
RunPCA(verbose = FALSE)
ElbowPlot(object = obj, ndims = 50)
obj <- RunUMAP(object = obj, dims = 1:30, verbose = FALSE)
DimPlot(
object = obj,
reduction = "umap",
group.by = "Batch",
cols = DiscretePalette(n = nlevels(obj$Batch), palette = "polychrome"),
pt.size = 0.1,
) +
DimPlot(
object = obj,
reduction = "umap",
group.by = "Selection",
pt.size = 0.1,
)
obj[["RNA"]] <- split(obj[["RNA"]], f = obj$Batch)
obj <- SCTransform(object = obj, verbose = FALSE)
obj <- RunPCA(object = obj, verbose = FALSE)
ElbowPlot(object = obj, ndims = 50)
obj <- IntegrateLayers(
object = obj,
method = HarmonyIntegration,
normalization.method = "SCT",
orig.reduction = "pca", new.reduction = "harmony",
assay = "SCT",
verbose = FALSE
)
obj <- RunUMAP(object = obj, reduction = "harmony", dims = 1:30, reduction.name = "umap_harmony", verbose = F)
plot_cell_cls("umap_harmony")
saveRDS(
object = obj,
file = here("data/processed/01_initial_processing.Rds")
)
sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.4
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Kiev
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices datasets utils methods base
##
## other attached packages:
## [1] humancortexref.SeuratData_1.0.0 here_1.0.1 Azimuth_0.5.0
## [4] shinyBS_0.61.1 Seurat_5.2.1 SeuratObject_5.0.2
## [7] sp_2.2-0 readxl_1.4.5 lubridate_1.9.4
## [10] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
## [13] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1
## [16] tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.5 ProtGenerics_1.38.0 matrixStats_1.5.0
## [4] spatstat.sparse_3.1-0 bitops_1.0-9 DirichletMultinomial_1.48.0
## [7] TFBSTools_1.44.0 httr_1.4.7 RColorBrewer_1.1-3
## [10] tools_4.4.2 sctransform_0.4.1 R6_2.6.1
## [13] DT_0.33 lazyeval_0.2.2 uwot_0.2.3
## [16] rhdf5filters_1.18.1 withr_3.0.2 gridExtra_2.3
## [19] rematch_2.0.0 progressr_0.15.1 cli_3.6.4
## [22] Biobase_2.66.0 spatstat.explore_3.4-2 fastDummies_1.7.5
## [25] EnsDb.Hsapiens.v86_2.99.0 shinyjs_2.1.0 labeling_0.4.3
## [28] sass_0.4.9 spatstat.data_3.1-6 ggridges_0.5.6
## [31] pbapply_1.7-2 Rsamtools_2.22.0 R.utils_2.13.0
## [34] harmony_1.2.3 parallelly_1.43.0 BSgenome_1.74.0
## [37] rstudioapi_0.17.1 RSQLite_2.3.9 generics_0.1.3
## [40] BiocIO_1.16.0 gtools_3.9.5 ica_1.0-3
## [43] spatstat.random_3.3-3 googlesheets4_1.1.1 GO.db_3.20.0
## [46] Matrix_1.7-3 ggbeeswarm_0.7.2 S4Vectors_0.44.0
## [49] abind_1.4-8 R.methodsS3_1.8.2 lifecycle_1.0.4
## [52] yaml_2.3.10 SummarizedExperiment_1.36.0 glmGamPoi_1.18.0
## [55] rhdf5_2.50.2 SparseArray_1.6.2 Rtsne_0.17
## [58] grid_4.4.2 blob_1.2.4 promises_1.3.2
## [61] shinydashboard_0.7.2 crayon_1.5.3 pwalign_1.2.0
## [64] miniUI_0.1.1.1 lattice_0.22-6 cowplot_1.1.3
## [67] GenomicFeatures_1.58.0 annotate_1.84.0 KEGGREST_1.46.0
## [70] pillar_1.10.1 knitr_1.50 GenomicRanges_1.58.0
## [73] rjson_0.2.23 future.apply_1.11.3 codetools_0.2-20
## [76] fastmatch_1.1-6 glue_1.8.0 spatstat.univar_3.1-2
## [79] data.table_1.17.0 vctrs_0.6.5 png_0.1-8
## [82] spam_2.11-1 cellranger_1.1.0 gtable_0.3.6
## [85] poweRlaw_1.0.0 cachem_1.1.0 xfun_0.51
## [88] Signac_1.14.0 S4Arrays_1.6.0 mime_0.13
## [91] survival_3.8-3 gargle_1.5.2 RcppRoll_0.3.1
## [94] fitdistrplus_1.2-2 ROCR_1.0-11 nlme_3.1-167
## [97] bit64_4.6.0-1 RcppAnnoy_0.0.22 rprojroot_2.0.4
## [100] GenomeInfoDb_1.42.3 bslib_0.9.0 irlba_2.3.5.1
## [103] vipor_0.4.7 KernSmooth_2.23-26 SeuratDisk_0.0.0.9021
## [106] colorspace_2.1-1 seqLogo_1.72.0 BiocGenerics_0.52.0
## [109] DBI_1.2.3 ggrastr_1.0.2 tidyselect_1.2.1
## [112] bit_4.6.0 compiler_4.4.2 curl_6.2.2
## [115] hdf5r_1.3.12 DelayedArray_0.32.0 plotly_4.10.4
## [118] rtracklayer_1.66.0 scales_1.3.0 caTools_1.18.3
## [121] lmtest_0.9-40 rappdirs_0.3.3 digest_0.6.37
## [124] goftest_1.2-3 presto_1.0.0 spatstat.utils_3.1-3
## [127] rmarkdown_2.29 RhpcBLASctl_0.23-42 XVector_0.46.0
## [130] htmltools_0.5.8.1 pkgconfig_2.0.3 sparseMatrixStats_1.18.0
## [133] MatrixGenerics_1.18.1 fastmap_1.2.0 ensembldb_2.30.0
## [136] rlang_1.1.5 htmlwidgets_1.6.4 UCSC.utils_1.2.0
## [139] DelayedMatrixStats_1.28.1 shiny_1.10.0 farver_2.1.2
## [142] jquerylib_0.1.4 zoo_1.8-13 jsonlite_2.0.0
## [145] BiocParallel_1.40.2 R.oo_1.27.0 RCurl_1.98-1.17
## [148] magrittr_2.0.3 GenomeInfoDbData_1.2.13 dotCall64_1.2
## [151] patchwork_1.3.0 Rhdf5lib_1.28.0 munsell_0.5.1
## [154] Rcpp_1.0.14 reticulate_1.42.0 stringi_1.8.7
## [157] zlibbioc_1.52.0 MASS_7.3-65 plyr_1.8.9
## [160] parallel_4.4.2 listenv_0.9.1 ggrepel_0.9.6
## [163] deldir_2.0-4 CNEr_1.42.0 Biostrings_2.74.1
## [166] splines_4.4.2 tensor_1.5 hms_1.1.3
## [169] BSgenome.Hsapiens.UCSC.hg38_1.4.5 igraph_2.1.4 spatstat.geom_3.3-6
## [172] RcppHNSW_0.6.0 reshape2_1.4.4 stats4_4.4.2
## [175] TFMPvalue_0.0.9 XML_3.99-0.18 evaluate_1.0.3
## [178] renv_1.1.2 BiocManager_1.30.25 JASPAR2020_0.99.10
## [181] tzdb_0.5.0 httpuv_1.6.15 RANN_2.6.2
## [184] polyclip_1.10-7 future_1.34.0 SeuratData_0.2.2.9002
## [187] scattermore_1.2 xtable_1.8-4 restfulr_0.0.15
## [190] AnnotationFilter_1.30.0 RSpectra_0.16-2 later_1.4.1
## [193] googledrive_2.1.1 viridisLite_0.4.2 beeswarm_0.4.0
## [196] memoise_2.0.1 AnnotationDbi_1.68.0 GenomicAlignments_1.42.0
## [199] IRanges_2.40.1 cluster_2.1.8 timechange_0.3.0
## [202] globals_0.16.3